The following document includes the code required to generate supplementary figures for the paper:
“Spatial Transcriptomic Profiling of the Human Fallopian Tube Epithelium Reveals Region-specific Gene Expression Patterns”
This document uses a GeoMx dataset that has already undergone Quality control and normalization.
For details, see: geomx_dataset_and_normalization.Rmd
The following packages are needed for this document.
#
# if (!require("BiocManager", quietly = TRUE))
# install.packages("BiocManager")
# The following initializes most up to date version of Bioc
# BiocManager::install(version="3.15")
#
# BiocManager::install("NanoStringNCTools")
# BiocManager::install("GeomxTools")
# BiocManager::install("GeoMxWorkflows")
# Note:
# Needed to install package lme4, numderiv
library(NanoStringNCTools)
library(GeomxTools)
library(GeoMxWorkflows)
if(packageVersion("GeomxTools") < "2.1" &
packageVersion("GeoMxWorkflows") >= "1.0.1"){
stop("GeomxTools and Workflow versions do not match. Please use the same version.
This workflow is meant to be used with most current version of packages.
If you are using an older version of Bioconductor please reinstall GeoMxWorkflows and use vignette(GeoMxWorkflows) instead")
}
if(packageVersion("GeomxTools") > "2.1" &
packageVersion("GeoMxWorkflows") <= "1.0.1"){
stop("GeomxTools and Workflow versions do not match.
Please use the same version, see install instructions above.")
# to remove current package version
# remove.packages("GeomxTools")
# remove.packages("GeoMxWorkflows")
# see install instructions above
}
# for file names
library(here)
# read in xl files
library(readxl)
# Need for UMAP, tSNE plots
library(umap)
library(Rtsne)
# Needed for the heatmap plotting
library(ComplexHeatmap) #BiocManager::install("ComplexHeatmap")
#Needed for volcano plotting functions
library(ggrepel)
library(ggplot2)
library(latex2exp)
library(plyr)
library(dplyr)
# Read in images and convert to ggplot object
library(magick)
# used for some color palettes
library(RColorBrewer)
library(stringi)
library(paletteer)
library(wesanderson)
# For significance
library(ggpubr)
# For Stitching multiple plots together
library(patchwork)
library(cowplot)
library(grid)
library(tidyr)
library(readr) # needed for read_csv()
library(stringr)
For space purposes, some functions are moved to an outside document “functions.R”.
# make sure to use correct "here" function.
load(here::here("all_q_norm.Rdata"))
load(here::here("disc_q_norm.Rdata"))
load(here::here("valid_q_norm.Rdata"))
dim(all_q_norm)
## Features Samples
## 1012 153
dim(disc_q_norm)
## Features Samples
## 1026 77
dim(valid_q_norm)
## Features Samples
## 999 74
Refactor the GeoMx objects to use simplified region names.
factor_region <- function(GeoMxObj = all_q_norm){
pData(GeoMxObj)$region <- factor(pData(GeoMxObj)$region, levels = c("Fimbria", "Infundibulum", "Ampulla", "Isthmus"))
pData(GeoMxObj)$region <- revalue(pData(GeoMxObj)$region, c("Fimbria"="Fim", "Infundibulum"="Inf", "Ampulla" = "Amp", "Isthmus" = "Isth"))
return(GeoMxObj)
}
all_q_norm <- all_q_norm |> factor_region()
disc_q_norm <- disc_q_norm |> factor_region()
valid_q_norm <- valid_q_norm |> factor_region()
Subset the “all” dataset into discovery and validation datasets.
The major difference between these datasets and the “disc_q_norm” and “valid_q_norm” is that the latter were normalizzed separately. In contrast, all_disc and all_valid were normalized together and comparisons of gene expression between the two datasets are valid.
all_disc <- all_q_norm[, pData(all_q_norm)$Patient %in% c("P1", "P2", "P3")]
`%ni%` <- Negate(`%in%`)
all_valid <- all_q_norm[, pData(all_q_norm)$Patient %ni% c("P1", "P2", "P3")]
stat_files <- paste0(getwd(), "/stat_files")
stat_files_all <- paste0(stat_files, "/ALL")
stat_files_disc <- paste0(stat_files, "/Discovery")
stat_files_valid <- paste0(stat_files, "/Validation")
#secretory cells
pairwise_s_disc <- paste0(stat_files_disc, "/0.10_SECRETORY_Pairwise_region_comparison.xlsx")
pairwise_s_valid <- paste0(stat_files_valid, "/Validation_SECRETORY_Pairwise_region_comparison.xlsx")
pairwise_s_all <- paste0(stat_files_all, "/ALL_SECRETORY_Pairwise_region_comparison.xlsx")
# ciliated cells
pairwise_c_disc <- paste0(stat_files_disc, "/0.10_CILIATED_Pairwise_region_comparison.xlsx")
pairwise_c_valid <- paste0(stat_files_valid, "/Validation_CILIATED_Pairwise_region_comparison.xlsx")
pairwise_c_all <- paste0(stat_files_all, "/ALL_CILIATED_Pairwise_region_comparison.xlsx")
# create a nested list structure to store all of the dataframes in one object
pairwise_comparisons <- list()
Create list of pairwise comparisons
pairwise_comparisons[["disc"]][["sec"]] <- get_all_comparions(path = pairwise_s_disc)
pairwise_comparisons[["valid"]][["sec"]] <- get_all_comparions(path = pairwise_s_valid)
pairwise_comparisons[["all"]][["sec"]] <- get_all_comparions(path = pairwise_s_all)
pairwise_comparisons[["disc"]][["cil"]] <- get_all_comparions(path = pairwise_c_disc)
pairwise_comparisons[["valid"]][["cil"]] <- get_all_comparions(path = pairwise_c_valid)
pairwise_comparisons[["all"]][["cil"]] <- get_all_comparions(path = pairwise_c_all)
In this comparison, one region is comared to the average of all other regions, for four total comparisons
Excel doc locations
one_v_other_s_disc <- paste0(stat_files_disc, "/0.10_SECRETORY_One.VS.Others_region_comparison.xlsx")
one_v_other_s_valid <- paste0(stat_files_valid, "/Validation_SECRETORY_One.VS.Others_region_comparison.xlsx")
one_v_other_s_all <- paste0(stat_files_all, "/ALL_SECRETORY_One.VS.Others_region_comparison.xlsx")
one_v_other_c_disc <- paste0(stat_files_disc, "/0.10_CILIATED_One.VS.Others_region_comparison.xlsx")
one_v_other_c_valid <- paste0(stat_files_valid, "/Validation_CILIATED_One.VS.Others_region_comparison.xlsx")
one_v_other_c_all <- paste0(stat_files_all, "/ALL_CILIATED_One.VS.Others_region_comparison.xlsx")
one_v_other <- list()
comparison_one_v_other <- c("Fimbria.vs.Others", "Infundibulum.vs.Others", "Ampulla.vs.Others", "Isthmus.vs.Others")
one_v_other_names <- c("fim_v_other", "inf_v_other", "amp_v_other", "isth_v_other")
one_v_other[["disc"]][["sec"]] <- get_all_comparions(path = one_v_other_s_disc, comparison_list = comparison_one_v_other, comparison_name = one_v_other_names)
one_v_other[["valid"]][["sec"]] <- get_all_comparions(path = one_v_other_s_valid, comparison_list = comparison_one_v_other, comparison_name = one_v_other_names)
one_v_other[["all"]][["sec"]] <- get_all_comparions(path = one_v_other_s_all, comparison_list = comparison_one_v_other, comparison_name = one_v_other_names)
one_v_other[["disc"]][["cil"]] <- get_all_comparions(path = one_v_other_c_disc, comparison_list = comparison_one_v_other, comparison_name = one_v_other_names)
one_v_other[["valid"]][["cil"]] <- get_all_comparions(path = one_v_other_c_valid, comparison_list = comparison_one_v_other, comparison_name = one_v_other_names)
one_v_other[["all"]][["cil"]] <- get_all_comparions(path = one_v_other_c_all, comparison_list = comparison_one_v_other, comparison_name = one_v_other_names)
OVGP1 staining of the FT as a proxy for menstrual cycle status.
A is an image and B is a table and are not included.
Supplemental Figure 1C is generated below.
OVGP1_percent <- read_xlsx(path = paste0(getwd(), "/IHC_analysis/OVGP1 Percentages.xlsx"))
OVGP1 <- OVGP1_percent |> pivot_longer(cols = c("Fim", "Inf", "Amp", "Isth"), names_to = "region", values_to = "percent") |>
mutate(percent = suppressWarnings(as.numeric(percent))) # some values are coerced to NA, but this is desired, so warning suppressed
OVGP1$region <- OVGP1$region |> factor(levels = c("Fim", "Inf", "Amp", "Isth"))
# remove slides not in GeoMx dataset
OVGP1_subset <- OVGP1[OVGP1$patient_ID %in% unique(pData(all_q_norm)$patient_ID),]
# this is necessary to merge with the GeoMx Object.
OVGP1_subset <- transform(OVGP1_subset, patient_ID = as.double(patient_ID))
OVGP1 Results Boxplot
ggplot(OVGP1_subset, aes(x = region, y = percent, fill = region)) +
geom_boxplot()+
geom_point() +
geom_line(aes(group = patient, color = patient), linetype = "dashed", linewidth = 1.2)+
theme_bw()+
scale_color_manual(values = c("#B01513FF", "#EA6312FF", "#F9A729FF", "#50C49FFF", "#54849AFF", "darkblue", "#B560D4FF"))+
scale_fill_manual(values= c("#7CAE00", "#00C1C6", "#F8766D", "#C77CFF"))+
theme(text=element_text(size=20))+
labs(y = "Percent cells OVGP1+", x = "Anatomical Region")
Supplemental Figure 2 - Biological Process Gene Ontology Analysis of differentially expressed genes for all region comparisons in the fallopian tube discovery dataset
GO_file <- "/GO_analysis"
# Discovery File Locations
GO_disc_sec <- paste0(getwd(), GO_file, "/Discovery_GO", "/Pairwise/Sec/")
GO_disc_cil <- paste0(getwd(), GO_file, "/Discovery_GO", "/Pairwise/Cil/")
# Validation File Locations
GO_valid_sec <- paste0(getwd(), GO_file, "/Validation_GO", "/Pairwise/Sec/")
GO_valid_cil <- paste0(getwd(), GO_file, "/Validation_GO", "/Pairwise/Cil/")
# Combined (all) File Locations
GO_all_sec <- paste0(getwd(), GO_file, "/All_GO", "/Pairwise/Sec/")
GO_all_cil <- paste0(getwd(), GO_file, "/All_GO", "/Pairwise/Cil/")
This function automatically reads in all pairwise comparisons (if they exist) and assigns them names. Make sure to check that all names exactly match specifications, or they will not be read in.
comparison_pairwise_GO <- c("Fim v Inf", "Fim v Amp", "Fim v Isth",
"Inf v Amp", "Inf v Isth", "Amp v Isth")
pairwise_names_GO <- c("fim_v_inf", "fim_v_amp", "fim_v_isth", "inf_v_amp", "inf_v_isth", "amp_v_isth")
# append_names <- c()
get_all_comparisons_GO <- function(path = GO_disc_cil, comparison_list = comparison_pairwise_GO, comparison_name = pairwise_names_GO, append = " GO BP Cil.csv"){
path <- paste0(path, comparison_list, append) # create full path names
path_rm <- path[file.exists(path)] # check to see if files exist and remove if not
names_rm <- comparison_name[file.exists(path)] # remove names for files removed
data_list <- lapply(path_rm, read_csv, show_col_types = F) # read in the data files
names(data_list) <- names_rm # assign names
return(data_list)
}
# create a nested list structure to store all of the dataframes in one object
GO_pairwise <- list()
GO_pairwise[["disc"]][["sec"]] <- get_all_comparisons_GO(path = GO_disc_sec, append = " GO BP Sec.csv")
GO_pairwise[["valid"]][["sec"]] <- get_all_comparisons_GO(path = GO_valid_sec, append = " GO BP Sec.csv")
GO_pairwise[["all"]][["sec"]] <- get_all_comparisons_GO(path = GO_all_sec , append = " GO BP Sec.csv")
GO_pairwise[["disc"]][["cil"]] <- get_all_comparisons_GO(path = GO_disc_cil, append = " GO BP Cil.csv")
GO_pairwise[["valid"]][["cil"]] <- get_all_comparisons_GO(path = GO_valid_cil, append = " GO BP Cil.csv")
GO_pairwise[["all"]][["cil"]] <- get_all_comparisons_GO(path = GO_all_cil, append = " GO BP Cil.csv")
names(GO_pairwise$disc$sec)
## [1] "fim_v_inf" "fim_v_amp" "fim_v_isth" "inf_v_isth" "amp_v_isth"
names(GO_pairwise$valid$sec)
## [1] "fim_v_isth" "inf_v_amp" "inf_v_isth" "amp_v_isth"
names(GO_pairwise$all$sec)
## [1] "fim_v_inf" "fim_v_amp" "inf_v_amp" "amp_v_isth"
names(GO_pairwise$disc$cil)
## [1] "fim_v_inf" "fim_v_amp" "fim_v_isth" "inf_v_isth" "amp_v_isth"
names(GO_pairwise$valid$cil)
## [1] "fim_v_amp" "fim_v_isth" "inf_v_amp" "inf_v_isth" "amp_v_isth"
names(GO_pairwise$all$cil)
## [1] "fim_v_inf" "fim_v_amp" "inf_v_amp" "amp_v_isth"
GO_plots <- list()
pairwise_names <- c("fim_v_inf", "fim_v_amp", "fim_v_isth", "inf_v_isth", "amp_v_isth")
make_GO_plots <- function(dfs = GO_plots$disc$sec){
# get the names of all data frames in the set
GO_names <- names(dfs)
# for each name, generate a GO plot
output_graphs <- lapply(GO_names,
function(x){
plot_GO(data = dfs[[x]], title = x)
})
# assign names to output
names(output_graphs) <- GO_names
return(output_graphs)
}
GO_plots$disc$sec <- make_GO_plots(dfs = GO_pairwise$disc$sec)
GO_plots$disc$cil <- make_GO_plots(dfs = GO_pairwise$disc$cil)
GO_plots$valid$sec <- make_GO_plots(dfs = GO_pairwise$valid$sec)
GO_plots$valid$cil <- make_GO_plots(dfs = GO_pairwise$valid$cil)
GO_plots$all$sec <- make_GO_plots(dfs = GO_pairwise$all$sec)
GO_plots$all$cil <- make_GO_plots(dfs = GO_pairwise$all$cil)
top_1 <- grobTree(rectGrob(gp=gpar(fill='#F0F0F0',col= 'black')), textGrob('Discovery Dataset: GO Plots for Secretory Cells', gp=gpar(fontsize=25)))
GO_plot_disc_sec <- GO_plots$disc$sec$fim_v_inf + GO_plots$disc$sec$fim_v_amp + GO_plots$disc$sec$fim_v_isth +
grid::textGrob('No pathways enriched \n Infundibulum v Ampulla', gp=gpar(fontsize=25)) + GO_plots$disc$sec$inf_v_isth + GO_plots$disc$sec$amp_v_isth +
plot_annotation(tag_levels = list("a")) & theme(plot.tag = element_text(size = 25))
top_2 <- grobTree(rectGrob(gp=gpar(fill='#F0F0F0',col= 'black')), textGrob('Discovery Dataset: GO Plots for Ciliated Cells', gp=gpar(fontsize=25)))
GO_plot_disc_cil <-
GO_plots$disc$cil$fim_v_inf + GO_plots$disc$cil$fim_v_amp + GO_plots$disc$cil$fim_v_isth +
grid::textGrob('No pathways enriched \n Infundibulum v Ampulla', gp=gpar(fontsize=25)) + GO_plots$disc$cil$inf_v_isth + GO_plots$disc$cil$amp_v_isth +
plot_annotation(tag_levels = list("g", "h", "i", "j", "k", "l")) & theme(plot.tag = element_text(size = 25))
wrap_elements(top_1) / (GO_plot_disc_sec) / wrap_elements(top_2) / GO_plot_disc_cil +
plot_layout(heights = c(0.2, 4, 0.2, 4))
wrap_elements(top_1) / (GO_plots$disc$sec$fim_v_inf + GO_plots$disc$sec$fim_v_amp + GO_plots$disc$sec$fim_v_isth +
grid::textGrob('No pathways enriched \n Infundibulum v Ampulla', gp=gpar(fontsize=25)) + GO_plots$disc$sec$inf_v_isth + GO_plots$disc$sec$amp_v_isth) / wrap_elements(top_2) / (GO_plots$disc$cil$fim_v_inf + GO_plots$disc$cil$fim_v_amp + GO_plots$disc$cil$fim_v_isth +
grid::textGrob('No pathways enriched \n Infundibulum v Ampulla', gp=gpar(fontsize=25)) + GO_plots$disc$cil$inf_v_isth + GO_plots$disc$cil$amp_v_isth) +
plot_layout(heights = c(0.2, 4, 0.2, 4)) +
plot_annotation(tag_levels = list(c("", "a", "b", "c", "d", "e", "f", "", "g", "h", "i", "j", "k", "l"))) & theme(plot.tag = element_text(size = 25))
Supplementary Figure 3 – Pairwise Comparisons of all Regions (Discovery Dataset).
Generate all pairwise comparisons for the discovery dataset for both secretory and ciliated cells.
df1 <- pairwise_comparisons$valid$sec
df2 <- pairwise_comparisons$valid$cil
# cil_fim_v_inf <- get_common_genes(df_1 = df1$fim_v_inf, df_2 = df2$fim_v_inf, type = "both", FC_cutoff = 0.2)
# cil_fim_v_amp <- get_common_genes(df_1 = df1$fim_v_amp, df_2 = df2$fim_v_amp, type = "both", FC_cutoff = 0.2)
# cil_fim_v_isth <- get_common_genes(df_1 = df1$fim_v_isth, df_2 = df2$fim_v_isth, type = "both", FC_cutoff = 0.2)
# cil_inf_v_amp <- get_common_genes(df_1 = df1$inf_v_amp, df_2 = df2$inf_v_amp, type = "both", FC_cutoff = 0.2)
# cil_inf_v_isth <- get_common_genes(df_1 = df1$inf_v_isth, df_2 = df2$inf_v_isth, type = "both", FC_cutoff = 0.2)
# cil_amp_v_isth <- get_common_genes(df_1 = df1$amp_v_isth, df_2 = df2$amp_v_isth, type = "both", FC_cutoff = 0.2)
title = 20
n_genes = 20
lab_size = 5
# DISCOVERY
a <- volcano_plot(df = df1$fim_v_inf,
title_name = "Fim v Inf, Secretory, Discovery",
title_size = title, upreg = "Fimbria", downreg = "Infundibulum"
# highlight_genes = cil_fim_v_inf
) |> add_labels(df = df1$fim_v_inf, n_genes = n_genes, lab_size = lab_size) |> remove_legend()
b <- volcano_plot(df = df1$fim_v_amp,
title_name = "Fim v Amp, Secretory, Discovery",
title_size = title, upreg = "Fimbria", downreg = "Ampulla"
# highlight_genes = cil_fim_v_amp
) |> add_labels(df = df1$fim_v_amp, n_genes = n_genes, lab_size = lab_size)
c <- volcano_plot(df = df1$fim_v_isth,
title_name = "Fim v Isth, Secretory, Discovery",
title_size = title, upreg = "Fimbria", downreg = "Isthmus"
# highlight_genes = cil_fim_v_isth
) |> add_labels(df = df1$fim_v_isth, n_genes = n_genes, lab_size = lab_size)
d <- volcano_plot(df = df1$inf_v_amp,
title_name = "Inf v Amp, Secretory, Discovery",
title_size = title, upreg = "Infundibulum", downreg = "Ampulla"
# highlight_genes = cil_inf_v_amp
) |> add_labels(df = df1$inf_v_amp, n_genes = n_genes, lab_size = lab_size) |> remove_legend()
e <- volcano_plot(df = df1$inf_v_isth,
title_name = "Inf v Isth, Secretory, Discovery",
title_size = title, upreg = "Infundibulum", downreg = "Isthmus"
# highlight_genes = cil_inf_v_isth
) |> add_labels(df = df1$inf_v_isth, n_genes = n_genes, lab_size = lab_size)
f <- volcano_plot(df = df1$amp_v_isth,
title_name = "Amp v Isth, Secretory, Discovery",
title_size = title, upreg = "Ampulla", downreg = "Isthmus"
# highlight_genes = cil_amp_v_isth
) |> add_labels(df = df1$amp_v_isth, n_genes = n_genes, lab_size = lab_size)
# Validation
g <- volcano_plot(df = df2$fim_v_inf,
title_name = "Fimb v Inf, Ciliated, Discovery",
title_size = title, upreg = "Fimbria", downreg = "Infundibulum"
# highlight_genes = cil_fim_v_inf
) |> add_labels(df = df2$fim_v_inf, n_genes = n_genes, lab_size = lab_size) |> remove_legend()
h <- volcano_plot(df = df2$fim_v_amp,
title_name = "Fimb v Amp, Ciliated, Discovery",
title_size = title, upreg = "Fimbria", downreg = "Ampulla"
# highlight_genes = cil_fim_v_amp
) |> add_labels(df = df2$fim_v_amp, n_genes = n_genes, lab_size = lab_size)
i <- volcano_plot(df = df2$fim_v_isth,
title_name = "Fimb v Isth, Ciliated, Discovery",
title_size = title, upreg = "Fimbria", downreg = "Isthmus"
# highlight_genes = cil_fim_v_isth
) |> add_labels(df = df2$fim_v_isth, n_genes = n_genes, lab_size = lab_size)
j <- volcano_plot(df = df2$inf_v_amp,
title_name = "Inf v Amp, Ciliated, Discovery",
title_size = title, upreg = "Infundibulum", downreg = "Ampulla"
# highlight_genes = cil_inf_v_amp
) |> add_labels(df = df2$inf_v_amp, n_genes = n_genes, lab_size = lab_size) |> remove_legend()
k <- volcano_plot(df = df2$inf_v_isth,
title_name = "Inf v Isth, Ciliated, Discovery",
title_size = title, upreg = "Infundibulum", downreg = "Isthmus"
# highlight_genes = cil_inf_v_isth
) |> add_labels(df = df2$inf_v_isth, n_genes = n_genes, lab_size = lab_size)
l <- volcano_plot(df = df2$amp_v_isth,
title_name = "Amp v Isth, Ciliated, Discovery",
title_size = title, upreg = "Ampulla", downreg = "Isthmus"
# highlight_genes = cil_amp_v_isth
) |> add_labels(df = df2$amp_v_isth, n_genes = n_genes, lab_size = lab_size)
top <- grobTree(rectGrob(gp=gpar(fill='#F0F0F0',col= 'black')), textGrob('Discovery Dataset, Secretory Cells', gp=gpar(fontsize=25)))
bottom <- grobTree(rectGrob(gp=gpar(fill='#F0F0F0',col= 'black')), textGrob('Discovery Dataset, Ciliated Cells', gp=gpar(fontsize=25)))
wrap_elements(top) / ((a | b | c ) / (d | e | f)) / wrap_elements(bottom) / ((g | h | i ) / (j | k | l)) + plot_layout(heights = c(1,15,1,15), guides = "collect") + plot_annotation(tag_levels = list(c("", "a", "b", "c", "d", "e", "f", "", "g", "h", "i", "j", "k", "l"))) &
theme(plot.tag = element_text(size = 26))
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: ggrepel: 5 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 6 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
#S4.Pairwise Comparisons volcano plots (Validation Cohort).
Supplementary Figure 4 – Pairwise Comparisons of all Regions (Validation Cohort).
df1 <- pairwise_comparisons$valid$sec
df2 <- pairwise_comparisons$valid$cil
# cil_fim_v_inf <- get_common_genes(df_1 = df1$fim_v_inf, df_2 = df2$fim_v_inf, type = "both", FC_cutoff = 0.2)
# cil_fim_v_amp <- get_common_genes(df_1 = df1$fim_v_amp, df_2 = df2$fim_v_amp, type = "both", FC_cutoff = 0.2)
# cil_fim_v_isth <- get_common_genes(df_1 = df1$fim_v_isth, df_2 = df2$fim_v_isth, type = "both", FC_cutoff = 0.2)
# cil_inf_v_amp <- get_common_genes(df_1 = df1$inf_v_amp, df_2 = df2$inf_v_amp, type = "both", FC_cutoff = 0.2)
# cil_inf_v_isth <- get_common_genes(df_1 = df1$inf_v_isth, df_2 = df2$inf_v_isth, type = "both", FC_cutoff = 0.2)
# cil_amp_v_isth <- get_common_genes(df_1 = df1$amp_v_isth, df_2 = df2$amp_v_isth, type = "both", FC_cutoff = 0.2)
title = 20
n_genes = 20
lab_size = 5
# DISCOVERY
a <- volcano_plot(df = df1$fim_v_inf,
title_name = "Fim v Inf, Secretory, Validation",
title_size = title, upreg = "Fimbria", downreg = "Infundibulum"
# highlight_genes = cil_fim_v_inf
) |> add_labels(df = df1$fim_v_inf, n_genes = n_genes, lab_size = lab_size) |> remove_legend()
b <- volcano_plot(df = df1$fim_v_amp,
title_name = "Fim v Amp, Secretory, Validation",
title_size = title, upreg = "Fimbria", downreg = "Ampulla"
# highlight_genes = cil_fim_v_amp
) |> add_labels(df = df1$fim_v_amp, n_genes = n_genes, lab_size = lab_size)
c <- volcano_plot(df = df1$fim_v_isth,
title_name = "Fim v Isth, Secretory, Validation",
title_size = title, upreg = "Fimbria", downreg = "Isthmus"
# highlight_genes = cil_fim_v_isth
) |> add_labels(df = df1$fim_v_isth, n_genes = n_genes, lab_size = lab_size)
d <- volcano_plot(df = df1$inf_v_amp,
title_name = "Inf v Amp, Secretory, Validation",
title_size = title, upreg = "Infundibulum", downreg = "Ampulla"
# highlight_genes = cil_inf_v_amp
) |> add_labels(df = df1$inf_v_amp, n_genes = n_genes, lab_size = lab_size) |> remove_legend()
e <- volcano_plot(df = df1$inf_v_isth,
title_name = "Inf v Isth, Secretory, Validation",
title_size = title, upreg = "Infundibulum", downreg = "Isthmus"
# highlight_genes = cil_inf_v_isth
) |> add_labels(df = df1$inf_v_isth, n_genes = n_genes, lab_size = lab_size)
f <- volcano_plot(df = df1$amp_v_isth,
title_name = "Amp v Isth, Secretory, Validation",
title_size = title, upreg = "Ampulla", downreg = "Isthmus"
# highlight_genes = cil_amp_v_isth
) |> add_labels(df = df1$amp_v_isth, n_genes = n_genes, lab_size = lab_size)
# Validation
g <- volcano_plot(df = df2$fim_v_inf,
title_name = "Fimb v Inf, Ciliated, Validation",
title_size = title, upreg = "Fimbria", downreg = "Infundibulum"
# highlight_genes = cil_fim_v_inf
) |> add_labels(df = df2$fim_v_inf, n_genes = n_genes, lab_size = lab_size) |> remove_legend()
h <- volcano_plot(df = df2$fim_v_amp,
title_name = "Fimb v Amp, Ciliated, Validation",
title_size = title, upreg = "Fimbria", downreg = "Ampulla"
# highlight_genes = cil_fim_v_amp
) |> add_labels(df = df2$fim_v_amp, n_genes = n_genes, lab_size = lab_size)
i <- volcano_plot(df = df2$fim_v_isth,
title_name = "Fimb v Isth, Ciliated, Validation",
title_size = title, upreg = "Fimbria", downreg = "Isthmus"
# highlight_genes = cil_fim_v_isth
) |> add_labels(df = df2$fim_v_isth, n_genes = n_genes, lab_size = lab_size)
j <- volcano_plot(df = df2$inf_v_amp,
title_name = "Inf v Amp, Ciliated, Validation",
title_size = title, upreg = "Infundibulum", downreg = "Ampulla"
# highlight_genes = cil_inf_v_amp
) |> add_labels(df = df2$inf_v_amp, n_genes = n_genes, lab_size = lab_size) |> remove_legend()
k <- volcano_plot(df = df2$inf_v_isth,
title_name = "Inf v Isth, Ciliated, Validation",
title_size = title, upreg = "Infundibulum", downreg = "Isthmus"
# highlight_genes = cil_inf_v_isth
) |> add_labels(df = df2$inf_v_isth, n_genes = n_genes, lab_size = lab_size)
l <- volcano_plot(df = df2$amp_v_isth,
title_name = "Amp v Isth, Ciliated, Validation",
title_size = title, upreg = "Ampulla", downreg = "Isthmus"
# highlight_genes = cil_amp_v_isth
) |> add_labels(df = df2$amp_v_isth, n_genes = n_genes, lab_size = lab_size)
top <- grobTree(rectGrob(gp=gpar(fill='#F0F0F0',col= 'black')), textGrob('Validation Dataset, Secretory Cells', gp=gpar(fontsize=25)))
bottom <- grobTree(rectGrob(gp=gpar(fill='#F0F0F0',col= 'black')), textGrob('Validation Dataset, Ciliated Cells', gp=gpar(fontsize=25)))
wrap_elements(top) / ((a | b | c ) / (d | e | f)) / wrap_elements(bottom) / ((g | h | i ) / (j | k | l)) + plot_layout(heights = c(1,15,1,15), guides = "collect") + plot_annotation(tag_levels = list(c("", "a", "b", "c", "d", "e", "f", "", "g", "h", "i", "j", "k", "l"))) &
theme(plot.tag = element_text(size = 26))
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: ggrepel: 5 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 6 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
#S5. Regional comparison volcano plots (validation dataset)
Supplementary Figure 5. Regional comparison volcano plots showing transcripts upregulated and downregulated in secretory and ciliated ROIs in the validation cohort.
valid_sec <- one_v_other$valid$sec
volcano_plot(df = valid_sec$fim_v_other,
title_name = "Fimbria Secretory Cells, Validation", title_size = title, upreg = "Fimbria", downreg = "Others"
) |> add_labels(df = valid_sec$fim_v_other, n_genes = 15) |> remove_legend()
volcano_plot(df = valid_sec$inf_v_other,
title_name = "Infundibulum Secretory Cells, Validation", title_size = title, upreg = "Infundibulum", downreg = "Others"
) |> add_labels(df = valid_sec$inf_v_other, n_genes = 15) |> remove_legend()
volcano_plot(df = valid_sec$amp_v_other,
title_name = "Ampulla Secretory Cells, Validation", title_size = title, upreg = "Ampulla", downreg = "Others"
) |> add_labels(df = valid_sec$amp_v_other, n_genes = 15) |> remove_legend()
volcano_plot(df = valid_sec$isth_v_other,
title_name = "Isthmus Secretory Cells, Validation", title_size = title, upreg = "Isthmus", downreg = "Others"
) |> add_labels(df = valid_sec$isth_v_other, n_genes = 15) |> remove_legend()
valid_cil <- one_v_other$valid$cil
volcano_plot(df = valid_cil$fim_v_other,
title_name = "Fimbria Ciliated Cells, Validation", title_size = title, upreg = "Fimbria", downreg = "Others"
) |> add_labels(df = valid_cil$fim_v_other, n_genes = 15) |> remove_legend()
volcano_plot(df = valid_cil$inf_v_other,
title_name = "Infundibulum Ciliated Cells, Validation", title_size = title, upreg = "Infundibulum", downreg = "Others"
) |> add_labels(df = valid_cil$inf_v_other, n_genes = 15) |> remove_legend()
volcano_plot(df = valid_cil$amp_v_other,
title_name = "Ampulla Ciliated Cells, Validation", title_size = title, upreg = "Ampulla", downreg = "Others"
) |> add_labels(df = valid_cil$amp_v_other, n_genes = 15) |> remove_legend()
volcano_plot(df = valid_cil$isth_v_other,
title_name = "Isthmus Ciliated Cells, Validation", title_size = title, upreg = "Isthmus", downreg = "Others"
) |> add_labels(df = valid_cil$isth_v_other, n_genes = 15) |> remove_legend()
Supplemental Figure 6 – Markers of Mature Ciliated Cells are upregulated in the distal fallopian tube
Supplementary Figure 7. Other MHC-II Transcripts in the FT epithelium.
Supplementary Figure 8. Multiple datasets show MHC-II transcript expression varies throughout the menstrual cycle and is inversely correlated with OVGP1.
Analysis of publicly available FT transcriptome datasets.
This analysis is too long to fit neatly here, so I have separated it into two files.
For the Sowamber paper (figure S8A and S8B) see Sowamber Paper Analysis.Rmd.
Supplementary Figure 9. Peg cell markers elevated in the interior FT during follicular phase.
Markers of Non-ciliated epithelial (NCE) cells are available publically from Ulrich 2022. (DOI: 10.1016/j.devcel.2022.02.017)
The paper identifies probable peg cell cluster (OVGP1 producing secretory cells) as 2-5.
## Function to retrieve Gene IDs
pull_gene <- function(df = NULL, cluster_name = NULL){
out <- df |> filter(cluster == cluster_name) |> pull(gene)
return(out)
}
# Retrieve PEG cell markers identified in this excel document
source_file_NCE <- paste0(getwd(), "/ulrich_files/Ulrich_NCE_Markers.xlsx")
# read in excel document
NCSE_markers <- read_xlsx(source_file_NCE, skip = 4, range = cell_cols("B:H"))
# Filter out top markers (best cell type distinguishing capacity)
SE5_top <- NCSE_markers |> filter(cluster == "2-5") |> filter((pct.1 - pct.2) > 0.5) # PEG Cell Markers
# get list of genes in top markers
peg_markers <- SE5_top$gene
# Alternative, for looking at all markers significantly higher in peg cells
# SE5_markers <- NCSE_markers |> pull_gene(cluster_name = "2-5") # ALL PEG Markers
Subset the dataset to look only at secretory cells, then perform t-test comparing regions of interest.
summary: perform t-test comparing all genes between two different selected regions.
t.test_logfold <- function(obj, variable_type = "region", upreg_group = "Fimbria", downreg_group = "Isthmus", elt_name = "q_norm"){
# Get locations of all in the "up" group for that variable
g_up <- which(pData(obj)[variable_type] == upreg_group)
# get locations of all in the "down" group for that variable
g_down <- which(pData(obj)[variable_type] == downreg_group)
# Retrieve normalized expression data from the GeoMxDataSet Object
obj <- assayDataElement(object = obj, elt = elt_name)
# For each gene (every row), perform a T-test comparing the columns in the
#up_group vs down_group
t.test <- apply(obj, 1,
function(x) t.test(
x[g_up],
x[g_down]))
#calculate the log_2 Fold Change of up_group compared to down for each row
log_fold <- apply(obj, 1,
function(x) log2(mean(x[g_up])/mean(x[g_down]))
)
# convert to data frame
log_fold <- as.data.frame(log_fold)
# retrieve the p.values from the t.test object and store to a datafram
df <- do.call(rbind, lapply(t.test, function(x) c(
p_value = unname(x$p.value))))
# create an adjusted p_value using the Bonferooni correction
df <- transform(df, padj = p.adjust(p_value, method = "BH"))
# merge the log_fold values with the p_values
output <- merge(df, log_fold, by = 0)
# rename the column rownames to Marker.name
output <- output |> dplyr::rename(Marker.name = Row.names)
# Add the -log10 of the pvalue for plotting
output <- output |> mutate(minuslog10_Pvalue = -log10(p_value))
return(output)
}
# t.test_logfold_region(obj = all_q_norm, group_up = "Isth", group_down = "Fim")
# subset the discovery datset to look only at expression in Secretory Cells
disc_q_norm_sec <- disc_q_norm[, pData(disc_q_norm)$segment %in% c("Secretory")]
# perform t_tests for all regions
disc_isth.v.fim <- t.test_logfold(obj = disc_q_norm_sec, upreg_group = "Isth", downreg_group = "Fim")
disc_inf.v.fim <- t.test_logfold(obj = disc_q_norm_sec, upreg_group = "Inf", downreg_group = "Fim")
disc_inf.v.amp <- t.test_logfold(obj = disc_q_norm_sec, upreg_group = "Amp", downreg_group = "Inf")
disc_amp.v.isth <- t.test_logfold(obj = disc_q_norm_sec, upreg_group = "Isth", downreg_group = "Amp")
# A <- volcano_plot(df = disc_isth.v.fim, title_name = "Fimbria vs Isthmus Comparison (Discovery)", highlight_genes = peg_markers, upreg = "Isthmus", downreg = "Fimbria", FC_cutoff =0.5) |> add_labels(df = disc_isth.v.fim, highlight_genes = peg_markers, n_genes = 0)
s9_fim_v_inf <- volcano_plot(df = disc_inf.v.fim, title_name = "Fimbria vs Infundibulum Comparison (Discovery)", highlight_genes = peg_markers, upreg = "Infundibulum", downreg = "Fimbria", FC_cutoff =0.5) |> add_labels(df = disc_inf.v.fim, highlight_genes = peg_markers, n_genes = 0) |> remove_legend() + theme(plot.title = element_text(size=16))
s9_inf_v_amp <- volcano_plot(df = disc_inf.v.amp, title_name = "Infundibulum vs Ampulla Comparison (Discovery)", highlight_genes = peg_markers, upreg = "Ampulla", downreg = "Infundibulum", FC_cutoff =0.5)|> add_labels(df = disc_inf.v.amp, highlight_genes = peg_markers, n_genes = 0) |> remove_legend() + theme(plot.title = element_text(size=16))
s9_amp_v_isth <- volcano_plot(df = disc_amp.v.isth, title_name = "Ampulla vs Isthmus Comparison (Discovery)", highlight_genes = peg_markers, upreg = "Isthmus", downreg = "Ampulla", FC_cutoff =0.5) |> add_labels(df = disc_amp.v.isth, highlight_genes = peg_markers, n_genes = 0) |> remove_legend() + theme(plot.title = element_text(size=16))
#
# valid_q_norm_sec <- valid_q_norm[, pData(valid_q_norm)$segment %in% c("Secretory")]
#
#
# valid_isth.v.fim <- t.test_logfold(obj = valid_q_norm_sec, upreg_group = "Isth", downreg_group = "Fim")
#
# valid_inf.v.fim <- t.test_logfold(obj = valid_q_norm_sec, upreg_group = "Inf", downreg_group = "Fim")
#
# valid_inf.v.amp <- t.test_logfold(obj = valid_q_norm_sec, upreg_group = "Amp", downreg_group = "Inf")
#
# valid_amp.v.isth <- t.test_logfold(obj = valid_q_norm_sec, upreg_group = "Isth", downreg_group = "Amp")
#
#
# Q <- volcano_plot(df = valid_isth.v.fim, title_name = "Fimbria vs Isthmus Comparison (Validation)", highlight_genes = peg_markers, upreg = "Isthmus", downreg = "Fimbria", FC_cutoff =0.5)
#
# R <- volcano_plot(df = valid_inf.v.fim, title_name = "Fimbria vs Infundibulum Comparison (Validation)", highlight_genes = peg_markers, upreg = "Infundibulum", downreg = "Fimbria", FC_cutoff =0.5)
#
# S <- volcano_plot(df = valid_inf.v.amp, title_name = "Infundibulum vs Ampulla Comparison (Validation)", highlight_genes = peg_markers, upreg = "Ampulla", downreg = "Infundibulum", FC_cutoff =0.5)
#
#
# V <- volcano_plot(df = valid_amp.v.isth, title_name = "Isthmus vs Ampulla Comparison (Validation)", highlight_genes = peg_markers, upreg = "Isthmus", downreg = "Ampulla", FC_cutoff =0.5)
# (s9_fim_v_inf + s9_inf_v_amp + s9_amp_v_isth)
peg_markers_s9 <- c("LGR5", "NOTCH2", "COL1A2", "SERINC5")
# create settings for boxplots
make_figs9_boxplots <- function(genes = Fig5_genes, dataset = all_q_norm, title = "All"){
graphs <- lapply(X = genes, FUN = Cil_v_Sec_Anat_Boxplot_Points, dataset = dataset)
# resize
graph_resize <- lapply(graphs, adj_text_size, pnt = 17)
graph_remove_legend <- lapply(graph_resize, remove_legend)
graph_sig <- lapply(graph_remove_legend, add_sig, pnt = 4)
return(graph_sig)
}
# Create all of the Boxplots for the Discovery Dataset
fs9_disc <- make_figs9_boxplots(genes = peg_markers_s9 , dataset = disc_q_norm)
names(fs9_disc) <- peg_markers_s9
# Create all of the Boxplots for the Validation Dataset
fs9_valid <- make_figs9_boxplots(genes = peg_markers_s9 , dataset = valid_q_norm)
names(fs9_valid) <- peg_markers_s9
# Create Grobs for use as labels
top <- grobTree(rectGrob(gp=gpar(fill='#F0F0F0',col= 'black')), textGrob('Discovery Dataset: OVGP1+ (peg cell) transcripts', gp=gpar(fontsize=25)))
bottom <- grobTree(rectGrob(gp=gpar(fill='#F0F0F0',col= 'black')), textGrob('Validation Dataset: OVGP1+ (peg cell) transcripts', gp=gpar(fontsize=25)))
# create legends
legend_disc <- make_boxplot_legend(dataset = disc_q_norm)
legend_valid <- make_boxplot_legend(dataset = valid_q_norm)
# Part 1 - volcano plots
(s9_fim_v_inf | s9_inf_v_amp | s9_amp_v_isth) /
# Part 2 - disc PEG Transcripts
wrap_elements(top) /
(fs9_disc$`LGR5` | fs9_disc$`NOTCH2` | fs9_disc$`COL1A2` | fs9_disc$`SERINC5`) /
legend_disc /
# Part 3 - valid PEG Transcripts
wrap_elements(bottom) /
(fs9_valid$`LGR5` | fs9_valid$`NOTCH2` | fs9_valid$`COL1A2` | fs9_valid$`SERINC5`) /
legend_valid + plot_layout(heights =
c(3, # Part 1
0.5, 4, 0.5, # Part 2
0.5, 4, 0.5 # Part 3
))
Supplemental Figure 9 – Hormone Receptors and associated Pioneer Transcription Factors show fluctuating expression across fallopian tube driven by the menstrual cycle
# This function makes all of the boxplots for figure S10 according to the same specifications
FigS10_gene_list <- c("ESR1", "PGR", "AR", "FOXA2", "PBX1")
make_figS10 <- function(genes = FigS10_gene_list, dataset = all_q_norm, title = "All"){
graphs_up_Fig <- lapply(X = genes, FUN = Cil_v_Sec_Anat_Boxplot_Points, dataset = dataset)
Fig_resize <- lapply(graphs_up_Fig, adj_text_size, pnt = 17)
Fig_remove_legend <- lapply(Fig_resize, remove_legend)
Fig_sig <- lapply(Fig_remove_legend, add_sig, pnt = 4)
return(Fig_sig)
}
# Create all of the Boxplots for the Discovery Dataset
FS10_disc <- make_figS10(genes = FigS10_gene_list , dataset = disc_q_norm)
names(FS10_disc) <- FigS10_gene_list
# Create all of the Boxplots for the Validation Dataset
FS10_valid <- make_figS10(genes = FigS10_gene_list , dataset = valid_q_norm)
names(FS10_valid) <- FigS10_gene_list
# create grobs
top <- grobTree(rectGrob(gp=gpar(fill='#F0F0F0',col= 'black')), textGrob('Discovery Dataset: Hormone Receptors and Associated Pioneer Transcription Factors', gp=gpar(fontsize=25)))
bottom <- grobTree(rectGrob(gp=gpar(fill='#F0F0F0',col= 'black')), textGrob('Validation Dataset: Hormone Receptors and Associated Pioneer Transcription Factors', gp=gpar(fontsize=25)))
wrap_elements(top) /
(FS10_disc$ESR1 | FS10_disc$PGR | FS10_disc$AR | FS10_disc$FOXA2 | FS10_disc$PBX1) /
legend_disc /
wrap_elements(bottom) /
(FS10_valid$ESR1 | FS10_valid$PGR | FS10_valid$AR | FS10_valid$FOXA2 | FS10_valid$PBX1) /
legend_valid +
plot_layout(heights = c(0.5, 4, 0.5, 0.5, 4, 0.5))